################### profilometer ############################################################
# Here I analyze the profilometer data.
# I calculate the mean thickness and the bottom thickness.
# For flexible substrates, it is generally impossible to level the entire sample.
# In this version, the leveled interval must be indicated in the sample names:
# samplename_x1-x2mm_date.csv (date is optional)
# Example: LH-PU17_paper_CR(1)_3-6mm_2021-06-10.csv
# The script subsequently extracts these intervals.
# Update on 20/10/2021: added RMS roughness

## !! Warning: works only properly when there are not other csv files in the folder (like the statistics output)

################### Input variables - edit before running script ####################################
## Clean environment
rm(list=ls())

## Input variables
infolder <- "//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH-PU20_glass/Raw" # input folder excl final /
outfolder <- "//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH-PU20_glass/Output/" # output folder incl final /

resolution <- 0.833278 # scan resolution in um
peak_thresh <- 10 # x um as arbitrary threshold for aggregates/unexfoliated flakes

#################### Don't edit below unless you know what you're doing #############################

## Init
library(ggplot2)
library(latex2exp) # To use latex in plots
library(stringr) # package for regular expressions
library(quantmod) # findValleys/findPeaks
library(stringr) # package for regular expressions
library(quantmod) # findValleys/findPeaks

cbPalette = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## Read data
files <- list.files(infolder, pattern="*.csv", full.names=TRUE, recursive=FALSE)

## Create output folder
dir.create(outfolder)

## Create empty lists to store the data
sample_lst <- vector("list", length(files))
sampleFull_lst <- vector("list", length(files))
thickness_lst <- vector("list", length(files))
std_lst <- vector("list", length(files))
sem_lst <- vector("list", length(files))
baseline_lst <- vector("list", length(files))
baseline_sd_lst <- vector("list", length(files))
baseline_sem_lst <- vector("list", length(files))
peaks_mean_lst <- vector("list", length(files))
peaks_sd_lst <- vector("list", length(files))
peaks_sem_lst <- vector("list", length(files))
n_lst <- vector("list", length(files))
npeaks_lst <- vector("list", length(files))
nbaseline_lst <- vector("list", length(files))
roughness_rms_lst <- vector("list", length(files))

## Calculations
for (i in 1:length(files)) {
  if (str_detect(files[i], "stats")){ # ignore file if it is a statistics file
    next
  }
  ## Extract sample name
  sampleName <- str_extract(files[i],"[a-zA-Z0-9()\\-\\_]*\\.csv$") # regex which extracts the sample from the path name.
  
  ## Extract leveled sample area
  leveled_range <- str_extract(files[i], "[0-9]+\\-[0-9]+um") # regex which extracts the leveled area from the path name.
  x1 <- as.numeric(str_extract(leveled_range, "^[0-9]+")) # start of string (first number)
  x2 <- as.numeric(str_extract(leveled_range, "[0-9]+(?=um)")) # second number (preceded by mm)
  sampleNameOnly <- str_sub(sampleName, 1, -5)
  sampleName_norange <- str_replace(sampleName,"[0-9()\\-]*um.csv$","")
  
  ## read file
  profil <- read.csv(file=files[i], header=TRUE, sep=",",dec=".",skip=22) # load file
  
  ## calculations
  profi_data_um <- profil[round(x1/resolution):round(x2/resolution),1:2]*0.0001 # convert data to microns and only take central 700 um
  colnames(profi_data_um) <- c("x", "Height") # Modify column names
  profi_data_um$x_um <- profi_data_um$x*1e4
  profi_data_um_full <- profil[,1:2]*0.0001 # Full profiles
  colnames(profi_data_um_full) <- c("x", "Height") # Modify column names
  
  # Calculate mean thickness and baseline values
  thickness_mean <- mean(profi_data_um$Height, na.rm = TRUE)
  thickness_std <- sd(profi_data_um$Height, na.rm = TRUE)
  data_cleaned <- na.omit(profi_data_um$Height)
  thickness_sem <- thickness_std/sqrt(length(data_cleaned))
  
  peaks_x <- findPeaks(profi_data_um$Height, thresh = 0)
  peaks <- profi_data_um$Height[peaks_x]
  peaks2 <- peaks[peaks > peak_thresh]
  peaks_mean_lst[i] <- mean(peaks2)
  peaks_sd_lst[i] <- sd(peaks2)
  peaks_sem_lst[i] <- sd(peaks2)/sqrt(length(peaks2))
  
  lows_x <- findValleys(profi_data_um$Height, thresh = 0)
  lows <- profi_data_um$Height[lows_x]
  lows2 <- lows[lows > 0  & lows <= mean(peaks2)] # exclude negative data (likely corresponding to pinholes) and shoulder peaks
  baseline_lst[i] <- mean(lows2)
  baseline_sd_lst[i] <- sd(lows2)
  baseline_sem_lst[i] <- sd(lows2)/sqrt(length(lows2))
  
  # calculate RMS roughness (from Sarkani2019)
  roughness <- sqrt((1/length(profi_data_um$Height))*sum(profi_data_um$Height^2))
  
  ## store object in the list, by name
  sample_lst[i] <- sampleName_norange
  sampleFull_lst[i] <- sampleName
  thickness_lst[i] <- thickness_mean
  std_lst[i] <- thickness_std
  sem_lst[i] <- thickness_sem
  n_lst[i] <- length(data_cleaned)
  npeaks_lst[i] <- length(peaks2)
  nbaseline_lst[i] <- length(lows2)
  roughness_rms_lst[i] <- roughness
  
  ## Plots of profile
  # # full profile
  line.data <- data.frame(yintercept = c(baseline_lst[[i]], thickness_lst[[i]], peaks_mean_lst[[i]]), Lines = c("Baseline", "Mean", "Peaks mean"))
  
  pdf(paste(outfolder, sampleNameOnly, "_full_hlines.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
          ylim(0,30) +
          # xlim(0,1.5) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  
  # # full profile
  # pdf(paste(outfolder, sampleNameOnly, "_full.pdf", sep = ""),
  #     width = 7*16/9, height = 7)
  # print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
  #         geom_line(size = 2) +
  #         xlab(TeX("x ($cm$)")) +
  #         ylab(TeX("Height ($\\mu m$)")) +
  #         theme_bw(base_size = 24))
  # profil_plot
  # dev.off()
  
  # Leveled range profile with lines
  line.data <- data.frame(yintercept = c(baseline_lst[[i]], thickness_lst[[i]], peaks_mean_lst[[i]]), Lines = c("Baseline", "Mean", "Peaks mean"))
  
  pdf(paste(outfolder, sampleNameOnly, "_hlines.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x*10, y = Height)) +
          geom_line(size = 2) +
          # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
          ylim(0,30) +
          xlim(x1/1000, x2/1000) +
          xlab(TeX("x ($mm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # # Leveled range profile with lines (x in cm)
  # pdf(paste(outfolder, sampleNameOnly, "_hlines_cm.pdf", sep = ""),
  #     width = 7*16/9, height = 7)
  # print(profil_plot <- ggplot(data = profi_data_um, aes(x = x, y = Height)) +
  #         geom_line(size = 2) +
  #         # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
  #         # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
  #         # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
  #         geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
  #         ylim(0,30) +
  #         xlim(x1*0.1, x2*0.1) +
  #         xlab(TeX("x ($cm$)")) +
  #         ylab(TeX("Height ($\\mu m$)")) +
  #         scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
  #         labs(title = "") +
  #         theme_bw(base_size = 24))
  # profil_plot
  # dev.off()
  
  
  # # full profile
  pdf(paste(outfolder, sampleNameOnly, "_full.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          ylim(0,30) +
          xlab(TeX("x ($mm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # leveled range
  pdf(paste(outfolder, sampleNameOnly, "_leveled.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x*10, y = Height)) +
          geom_line(size = 2) +
          xlim(x1/1000, x2/1000) +
          ylim(0,30) +
          xlab(TeX("x ($mm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  
  # # 60 x 60 um
  # pdf(paste(outfolder, sampleNameOnly, "_um.pdf", sep = ""),
  #     width = 7, height = 7)
  # print(profil_plot <- ggplot(data = profi_data_um, aes(x = x_um, y = Height)) +
  #         geom_point(size = 2) +
  #         xlim(x1*1000,x1*1000+60) +
  #         ylim(0, 60) +
  #         xlab(TeX("x ($\\mu m$)")) +
  #         ylab(TeX("Height ($\\mu m$)")) +
  #         theme_bw(base_size = 24))
  # profil_plot
  # dev.off()
}

## Combine multiple measurements of one sample if applicable
library(rapport)
for (i in 2:length(files)) {
  # if !(is.null(sample_lst[i])){ # skip empty rows
  if (sample_lst[[i-1]] == sample_lst[[i]]) {
    # calculations
    t_mean <- (as.numeric(thickness_lst[i-1])*as.numeric(n_lst[i-1]) + as.numeric(thickness_lst[i])*as.numeric(n_lst[i]))/(as.numeric(n_lst[i-1])+as.numeric(n_lst[i]))
    std_mean <- (as.numeric(std_lst[i-1])*as.numeric(n_lst[i-1]) + as.numeric(std_lst[i])*as.numeric(n_lst[i]))/(as.numeric(n_lst[i-1])+as.numeric(n_lst[i]))
    n_tot <- as.numeric(n_lst[i-1]) + as.numeric(n_lst[i])
    sem_mean <- std_mean/sqrt(n_tot)
    
    peaks_mean <- (as.numeric(peaks_mean_lst[i-1])*as.numeric(npeaks_lst[i-1]) + as.numeric(peaks_mean_lst[i])*as.numeric(npeaks_lst[i]))/(as.numeric(npeaks_lst[i-1])+as.numeric(npeaks_lst[i]))
    peaks_sd_mean <- (as.numeric(peaks_sd_lst[i-1])*as.numeric(npeaks_lst[i-1]) + as.numeric(peaks_sd_lst[i])*as.numeric(npeaks_lst[i]))/(as.numeric(npeaks_lst[i-1])+as.numeric(npeaks_lst[i]))
    n_peaks <- as.numeric(npeaks_lst[i-1]) + as.numeric(npeaks_lst[i])
    sem_peaks <- peaks_sd_mean/sqrt(n_peaks)
    
    baseline_mean <- (as.numeric(baseline_lst[i-1])*as.numeric(nbaseline_lst[i-1]) + as.numeric(baseline_lst[i])*as.numeric(nbaseline_lst[i]))/(as.numeric(nbaseline_lst[i-1])+as.numeric(nbaseline_lst[i]))
    baseline_sd_mean <- (as.numeric(baseline_sd_lst[i-1])*as.numeric(nbaseline_lst[i-1]) + as.numeric(baseline_sd_lst[i])*as.numeric(nbaseline_lst[i]))/(as.numeric(nbaseline_lst[i-1])+as.numeric(nbaseline_lst[i]))
    n_valleys <- as.numeric(nbaseline_lst[i-1]) + as.numeric(nbaseline_lst[i])
    sem_baseline <- baseline_sd_mean/sqrt(n_valleys)
    
    roughness <- (as.numeric(roughness_rms_lst[i-1])*as.numeric(n_lst[i-1]) + as.numeric(roughness_rms_lst[i])*as.numeric(n_lst[i]))/(as.numeric(n_lst[i-1])+as.numeric(n_lst[i]))
    
    # append data to relevant lists
    sampleFull_lst <- append(sampleFull_lst, sample_lst[i])
    thickness_lst <- append(thickness_lst, t_mean)
    std_lst <- append(std_lst, std_mean)
    sem_lst <- append(sem_lst, sem_mean)
    n_lst <- append(n_lst, n_tot)
    peaks_mean_lst <- append(peaks_mean_lst, peaks_mean)
    peaks_sd_lst <- append(peaks_sd_lst, peaks_sd_mean)
    peaks_sem_lst <- append(peaks_sem_lst, sem_peaks)
    npeaks_lst <- append(npeaks_lst, n_peaks)
    baseline_lst <- append(baseline_lst, baseline_mean)
    baseline_sd_lst <- append(baseline_sd_lst, baseline_sd_mean)
    baseline_sem_lst <- append(baseline_sem_lst, sem_baseline)
    nbaseline_lst <- append(nbaseline_lst, n_valleys)
    roughness_rms_lst <- append(roughness_rms_lst, roughness)
  }
  # }
}

## Convert lists into dataframe columns 
df <- data.frame(unlist(sampleFull_lst), unlist(thickness_lst), unlist(std_lst),
                 unlist(sem_lst), unlist(n_lst), unlist(peaks_mean_lst), unlist(peaks_sd_lst),
                 unlist(peaks_sem_lst), unlist(npeaks_lst), unlist(baseline_lst), unlist(baseline_sd_lst),
                 unlist(baseline_sem_lst), unlist(nbaseline_lst), unlist(roughness_rms_lst)) 
# Names of columns of dataframe 
names(df) <- c("Sample", "Thickness_mean", "Thickness_sd", "Thickness_sem", "Number_points",
               "Peaks_mean", "Peaks_sd", "Peaks_sem", "Number_peak_points",
               "Baseline_mean", "Baseline_sd", "Baseline_sem", "Number_baseline_points", "RMS_roughness") 
# print(df) 

## Write to csv
write.table(df, paste(outfolder, "statistics.csv"),
            dec = ".", sep=",", col.names = TRUE, row.names = FALSE)
